#Load density values from the CRC handbook (Lide 2004)
den_C <- read.csv("./Data/Density_CRC.csv")

#Create a variable for concentration squared
den_C$Conc2 <- den_C$Conc_WW^2

#Renaming Density column for brevity in R_code
names(den_C)[names(den_C) == "Density_g_cm_cubed"] <- "Density"

###### Fitting polynomial regressions for calculating density from % w/w ######

#Sucrose

sm2 <- lm(Density ~ Conc_WW + Conc2, data = den_C[den_C$Sugar == "Sucrose",])
#Model summary with fitted parameters
(ssm2 <- summary(sm2))

#Predictions from fitted model
den_C$D_pred <- rep(-999, nrow(den_C))

den_C$D_pred[den_C$Sugar == "Sucrose"] <- ssm2$coefficients[1] + 
  ssm2$coefficients[2]*den_C$Conc_WW[den_C$Sugar == "Sucrose"] + 
  ssm2$coefficients[3]*den_C$Conc2[den_C$Sugar == "Sucrose"]

#Glucose

gm2 <- lm(Density ~ Conc_WW + Conc2, data = den_C[den_C$Sugar == "Glucose",])
#Model summary with fitted parameters
(sgm2 <- summary(gm2))

#Predictions from fitted model
den_C$D_pred[den_C$Sugar == "Glucose"] <- sgm2$coefficients[1] + 
  sgm2$coefficients[2]*den_C$Conc_WW[den_C$Sugar == "Glucose"] + 
  sgm2$coefficients[3]*den_C$Conc2[den_C$Sugar == "Glucose"]


#Fructose


fm2 <- lm(Density ~ Conc_WW + Conc2, data = den_C[den_C$Sugar == "Fructose",])
#Model summary with fitted parameters
(sfm2 <- summary(fm2))

#Predictions from fitted model
den_C$D_pred[den_C$Sugar == "Fructose"] <- sfm2$coefficients[1] + 
  sfm2$coefficients[2]*den_C$Conc_WW[den_C$Sugar == "Fructose"] + 
  sfm2$coefficients[3]*den_C$Conc2[den_C$Sugar == "Fructose"]


#### Error associated with predictions

den_C$Error <- abs(((den_C$Density - den_C$D_pred)/den_C$Density) * 100)
tapply(den_C$Error, den_C$Sugar, FUN = function(x) c(mean(x), max(x), median(x)))

#NB. while the density for fructose from The CRC handbook (Lide 2004)
#is only given up to a concentration of 48% w/w, this is sufficient to 
#accurately capture the relationship between concentration and density. For example, 
#Using the fitted model (equation) to predict fructose density for 
#concentrations >48% w/w gives density values in agreement with other 
#published values e.g. Bates 1942 Circular of the National Bureau of Standards C440, 
#Polarimetry, saccharimetry and the sugars. National Bureau of Standards, US Department of Commerce.


### R-squared for models
ssm2$adj.r.squared
sgm2$adj.r.squared
sfm2$adj.r.squared


####### Saving model coefficients for density for solutions in %w/w ######

sa <- c(ssm2$coefficients[1], ssm2$coefficients[2], ssm2$coefficients[3])
ga <- c(sgm2$coefficients[1], sgm2$coefficients[2], sgm2$coefficients[3])
fa <- c(sfm2$coefficients[1], sfm2$coefficients[2], sfm2$coefficients[3])

ww_coeffs <- as.data.frame(rbind(sa, ga, fa), row.names = c("Sucrose", "Glucose", "Fructose"))
colnames(ww_coeffs) <- c("a1", "a2", "a3")

#These values are the values in Table 2 in the manuscript.
write.csv(ww_coeffs, "./Output/Table_2_CRC_ww_density_coeffs.csv")



###### Fitting polynomial regression for calculating density from % w/v (g solute / 100 mL solution) ######

###### First calculate predicted values for % w/v using equation 2 (see main manuscript text for explanation)

#####Creating an empty vector to fill with %w/v values####

den_C$Conc_WV <- rep(-999, nrow(den_C))

#Predicted values for sucrose
den_C$Conc_WV[den_C$Sugar == "Sucrose"] <- with(den_C[den_C$Sugar == "Sucrose",],
                                                (ssm2$coefficients[1] + ssm2$coefficients[2]*Conc_WW + ssm2$coefficients[3]*Conc_WW^2) * Conc_WW)
#Predicted values for glucose
den_C$Conc_WV[den_C$Sugar == "Glucose"] <- with(den_C[den_C$Sugar == "Glucose",],
                                                (sgm2$coefficients[1] + sgm2$coefficients[2]*Conc_WW + sgm2$coefficients[3]*Conc_WW^2) * Conc_WW)
#Predicted values for fructose
den_C$Conc_WV[den_C$Sugar == "Fructose"] <- with(den_C[den_C$Sugar == "Fructose",],
                                                 (sfm2$coefficients[1] + sfm2$coefficients[2]*Conc_WW + sfm2$coefficients[3]*Conc_WW^2) * Conc_WW)

#Creating a vector with Concentration in w/v squared.
den_C$Conc_WV2 <- den_C$Conc_WV^2

#Sucrose
smv2 <- lm(Density ~ Conc_WV + Conc_WV2, data = den_C[den_C$Sugar == "Sucrose",])
#Model summary with fitted parameters

(ssmv2 <- summary(smv2))

den_C$D_pred_WV <- rep(-999, nrow(den_C))

#Predictions from fitted model
den_C$D_pred_WV[den_C$Sugar == "Sucrose"] <- ssmv2$coefficients[1] + 
  ssmv2$coefficients[2]*den_C$Conc_WV[den_C$Sugar == "Sucrose"] + 
  ssmv2$coefficients[3]*den_C$Conc_WV2[den_C$Sugar == "Sucrose"]

#Glucose

gmv2 <- lm(Density ~ Conc_WV + Conc_WV2, data = den_C[den_C$Sugar == "Glucose",])
#Model summary with fitted parameters
(sgmv2 <- summary(gmv2))

#Predictions from fitted model
den_C$D_pred_WV[den_C$Sugar == "Glucose"] <- sgmv2$coefficients[1] + 
  sgmv2$coefficients[2]*den_C$Conc_WV[den_C$Sugar == "Glucose"] + 
  sgmv2$coefficients[3]*den_C$Conc_WV2[den_C$Sugar == "Glucose"]



#Fructose


fmv2 <- lm(Density ~ Conc_WV + Conc_WV2, data = den_C[den_C$Sugar == "Fructose",])
#Model summary with fitted parameters
(sfmv2 <- summary(fmv2))

#Predictions from fitted model
den_C$D_pred_WV[den_C$Sugar == "Fructose"] <- sfmv2$coefficients[1] + 
  sfmv2$coefficients[2]*den_C$Conc_WV[den_C$Sugar == "Fructose"] + 
  sfmv2$coefficients[3]*den_C$Conc_WV2[den_C$Sugar == "Fructose"]


#### Error

den_C$Error_WV <- abs(((den_C$Density - den_C$D_pred_WV)/den_C$Density) * 100)
tapply(den_C$Error_WV, den_C$Sugar, FUN = function(x) c(mean(x), max(x), median(x)))


### R-squared for models
ssmv2$adj.r.squared
sgmv2$adj.r.squared
sfmv2$adj.r.squared


####### Saving model coefficients for predicting density of solutions in %w/v ######

sb <- c(ssmv2$coefficients[1], ssmv2$coefficients[2], ssmv2$coefficients[3])
gb <- c(sgmv2$coefficients[1], sgmv2$coefficients[2], sgmv2$coefficients[3])
fb <- c(sfmv2$coefficients[1], sfmv2$coefficients[2], sfmv2$coefficients[3])

wv_coeffs <- as.data.frame(rbind(sb, gb, fb), row.names = c("Sucrose", "Glucose", "Fructose"))
colnames(wv_coeffs) <- c("b1", "b2", "b3")

#These values are the values in Table 3 in the manuscript.
write.csv(wv_coeffs, "./Output/Table_3_CRC_wv_density_coeffs.csv")

